Sklearn

Визуализация данных



In [ ]:

    
from sklearn import datasets

import numpy as np



In [ ]:

    
%pylab inline

Загрузка выборки



In [ ]:

    
digits  =  datasets.load_digits()



In [ ]:

    
print digits.DESCR



In [ ]:

    
print 'target:', digits.target[0]
print 'features: \n', digits.data[0] 
print 'number of features:', len(digits.data[0])

Визуализация объектов выборки



In [ ]:

    
#не будет работать: Invalid dimensions for image data
pylab.imshow(digits.data[0])



In [ ]:

    
digits.data[0].shape



In [ ]:

    
print digits.data[0].reshape(8,8)



In [ ]:

    
digits.data[0].reshape(8,8).shape



In [ ]:

    
pylab.imshow(digits.data[0].reshape(8,8))



In [ ]:

    
print digits.keys()



In [ ]:

    
print digits.images[0]



In [ ]:

    
pylab.imshow(digits.images[0])



In [ ]:

    
pyplot.figure(figsize(8, 8))

pyplot.subplot(2, 2, 1)
pylab.imshow(digits.images[0])

pyplot.subplot(2, 2, 2)
pylab.imshow(digits.images[0], cmap = 'hot')

pyplot.subplot(2, 2, 3)
pylab.imshow(digits.images[0], cmap = 'gray')

pyplot.subplot(2, 2, 4)
pylab.imshow(digits.images[0], cmap = 'gray', interpolation = 'nearest')



In [ ]:

    
pyplot.figure(figsize(20, 8))

for plot_number, plot in enumerate(digits.images[:10]):
    pyplot.subplot(2, 5, plot_number + 1)
    pylab.imshow(plot, cmap = 'gray')
    pylab.title('digit: ' + str(digits.target[plot_number]))

Уменьшение размерности



In [ ]:

    
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

from collections import  Counter



In [ ]:

    
data = digits.data[:1000]
labels = digits.target[:1000]



In [ ]:

    
print Counter(labels)



In [ ]:

    
pylab.figure(figsize = (10, 6))
pylab.bar(Counter(labels).keys(), Counter(labels).values())



In [ ]:

    
classifier = KNeighborsClassifier()



In [ ]:

    
classifier.fit(data, labels)



In [ ]:

    
print classification_report(classifier.predict(data), labels)

Random projection



In [ ]:

    
from sklearn import random_projection



In [ ]:

    
projection = random_projection.SparseRandomProjection(n_components = 2, random_state = 0)
data_2d_rp = projection.fit_transform(data)



In [ ]:

    
pylab.figure(figsize=(10, 6))
pylab.scatter(data_2d_rp[:, 0], data_2d_rp[:, 1], c = labels)



In [ ]:

    
classifier.fit(data_2d_rp, labels)
print classification_report(classifier.predict(data_2d_rp), labels)

PCA



In [ ]:

    
from sklearn.decomposition import RandomizedPCA



In [ ]:

    
pca = RandomizedPCA(n_components = 2, random_state = 0)
data_2d_pca = pca.fit_transform(data)



In [ ]:

    
pylab.figure(figsize = (10, 6))
pylab.scatter(data_2d_pca[:, 0], data_2d_pca[:, 1], c = labels)



In [ ]:

    
classifier.fit(data_2d_pca, labels)
print classification_report(classifier.predict(data_2d_pca), labels)

MDS



In [ ]:

    
from sklearn import manifold



In [ ]:

    
mds = manifold.MDS(n_components = 2, n_init = 1, max_iter = 100)
data_2d_mds = mds.fit_transform(data)



In [ ]:

    
pylab.figure(figsize=(10, 6))
pylab.scatter(data_2d_mds[:, 0], data_2d_mds[:, 1], c = labels)



In [ ]:

    
classifier.fit(data_2d_mds, labels)
print classification_report(classifier.predict(data_2d_mds), labels)

t- SNE



In [ ]:

    
tsne = manifold.TSNE(n_components = 2, init = 'pca', random_state = 0)
data_2d_tsne = tsne.fit_transform(data)



In [ ]:

    
pylab.figure(figsize = (10, 6))
pylab.scatter(data_2d_tsne[:, 0], data_2d_tsne[:, 1], c = labels)



In [ ]:

    
classifier.fit(data_2d_tsne, labels)
print classification_report(classifier.predict(data_2d_tsne), labels)